home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Format CD 43
/
Amiga Format CD43 (1999)(Future Publishing)(GB)(Track 1 of 2)[!][issue 1999-09].iso
/
-serious-
/
misc
/
kc
/
original
/
kc.c
< prev
next >
Wrap
C/C++ Source or Header
|
1999-06-14
|
31KB
|
1,721 lines
/*
* kc.c : kanji code convert ( Version 1.8 ).
* Copyright(C) 1989-95 MUKAWA,Susumu mukawa@ctec.tn-sec.ntt.jp
* Permit anyone to use, modify, redistribute this software.
*/
#include <stdio.h>
#include "ctypes.h"
#define ESC '\033'
#define SO '\016'
#define SI '\017'
#define JIS2_KANJI "\033$@"
#define JIS2_ASCII "\033(J"
#define JIS2_KANA "\033(I"
#define JIS3_KANJI "\033$B"
#define JIS3_ASCII "\033(J"
#define JIS4_KANJI "\033$B"
#define JIS4_ASCII "\033(B"
#define JIS4_KANA "\033(I"
#define CT_KANJI "\033$(B"
#define CT_ASCII "\033(B"
#define CT_KANA "\033)I"
#define CT_LATIN1 "\033-A"
#define ILLEGAL (-2)
#define UNKNOWN (-1)
#define JIS (0)
#define JIS2 (2)
#define JIS3 (3)
#define JIS4 (4)
#define SJIS (5)
#define EUC (6)
#define CT (7)
#define VAGUENESS (100)
#define ASCII_MODE (0)
#define KANJI_MODE (1)
#define KANA_MODE (2)
#define CHECK_SIZE (1024) /* code check default size */
int bufSize = CHECK_SIZE;
int kanjiInCode = UNKNOWN; /* input stream kanji code. */
int kanjiOutCode = SJIS; /* output stream kanji code.
* default code is shift jis. */
int bit7 = 0; /* jis is 7 or 8 bit flag. */
int kanaInFlag = 0;
int modeFlag = ASCII_MODE; /* used in jis or ctext conver func */
int kanaFlag = 0;
int ungetCount = 0; /* unget buffer current point. */
int ungetCount2 = 0;
int *ungetBuf; /* my unget buffer. */
void (*kanjiOutFunc)() = NULL; /* kanji convert function. */
int verboseMode = 0; /* display status ? */
int codeCheckOnly = 0;
int getIllegalSeq = 0;
/* print kanji start sequence.
*/
void JisKanjiSeq()
{
switch(kanjiOutCode) {
case JIS2:
printf(JIS2_KANJI);
break;
case JIS3:
printf(JIS3_KANJI);
break;
case JIS4:
printf(JIS4_KANJI);
break;
case CT:
printf(CT_KANJI);
break;
}
}
/* print ascii start sequence.
*/
void JisAsciiSeq()
{
switch(kanjiOutCode) {
case JIS2:
printf(JIS2_ASCII);
break;
case JIS3:
printf(JIS3_ASCII);
break;
case JIS4:
printf(JIS4_ASCII);
break;
case CT:
printf(CT_ASCII);
break;
}
}
/* print kana start sequence.
*/
void JisKanaSeq()
{
switch(kanjiOutCode) {
case JIS2:
printf(JIS2_KANA);
break;
case JIS3:
break;
case JIS4:
printf(JIS4_KANA);
break;
case CT:
printf(CT_KANA);
break;
}
}
/* ctext convert to ctext and output.
*/
void CtoC(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
if(modeFlag != KANJI_MODE) {
JisKanjiSeq();
modeFlag = KANJI_MODE;
}
putchar(c1);
putchar(c2);
}
else if(c2 == 0) { /* ¸å»ÏËö */
if(modeFlag == KANJI_MODE)
JisAsciiSeq();
if(kanaFlag) {
printf(CT_LATIN1); /* Latin-1 ¤ò G1 ¤Ë¤â¤É¤¹ */
kanaFlag = 0;
}
modeFlag = ASCII_MODE;
}
else if(iskana(c2)) {
if(!kanaFlag) {
JisKanaSeq();
kanaFlag = 1;
}
putchar(c2);
}
else {
if(modeFlag != ASCII_MODE) {
JisAsciiSeq();
modeFlag = ASCII_MODE;
}
if(c2 == '\n' && kanaFlag) {
printf(CT_LATIN1); /* Latin-1 ¤ò G1 ¤Ë¤â¤É¤¹ */
kanaFlag = 0;
}
putchar(c2);
}
}
/* ctext convert to jis and output.
*/
void CtoJ(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
if(modeFlag != KANJI_MODE) {
JisKanjiSeq();
modeFlag = KANJI_MODE;
}
putchar(c1);
putchar(c2);
}
else if(c2 == 0) { /* ¸å»ÏËö */
if(modeFlag == KANJI_MODE ||
(modeFlag == KANA_MODE && bit7)) {
JisAsciiSeq();
}
modeFlag = ASCII_MODE;
}
else if(iskana(c2)) {
if(modeFlag != KANA_MODE) {
JisKanaSeq();
modeFlag = KANA_MODE;
}
if(bit7)
c2 &= 0x7f;
else
c2 |= 0x80;
putchar(c2);
}
else {
if(modeFlag != ASCII_MODE) {
JisAsciiSeq();
modeFlag = ASCII_MODE;
}
putchar(c2);
}
}
/* ctext convert to shift jis and output.
*/
void CtoS(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
c1 &= 0xff;
c2 &= 0xff;
if(c1 & 1)
c2 += 0x1f;
else
c2 += 0x7d;
if(c2 >= 0x7f)
++c2;
c1 = ((c1 - 0x21) >> 1) + 0x81;
if(c1 > 0x9f)
c1 += 0x40;
putchar(c1);
putchar(c2);
}
else /* £±¥Ð¥¤¥È·Ïʸ»ú */
putchar(c2);
}
/* ctext convert to euc and output.
*/
void CtoE(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
putchar(c1 | 0x80);
putchar(c2 | 0x80);
}
else { /* £±¥Ð¥¤¥È·Ïʸ»ú */
if(iskana(c2))
putchar(0x8e);
putchar(c2);
}
}
/* jis convert to jis and output.
*/
void JtoJ(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
if(modeFlag != KANJI_MODE) {
JisKanjiSeq();
modeFlag = KANJI_MODE;
}
putchar(c1);
putchar(c2);
}
else if(c2 == 0) { /* ¸å»ÏËö */
if(modeFlag == KANJI_MODE ||
(modeFlag == KANA_MODE && bit7)) {
JisAsciiSeq();
}
modeFlag = ASCII_MODE;
}
else if(kanaInFlag) {
if(modeFlag != KANA_MODE) {
JisKanaSeq();
modeFlag = KANA_MODE;
}
if(bit7)
c2 &= 0x7f;
else
c2 |= 0x80;
putchar(c2);
}
else {
if(modeFlag != ASCII_MODE) {
JisAsciiSeq();
modeFlag = ASCII_MODE;
}
putchar(c2);
}
}
/* jis convert to ctext and output.
*/
void JtoC(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
if(modeFlag != KANJI_MODE) {
JisKanjiSeq();
modeFlag = KANJI_MODE;
}
putchar(c1);
putchar(c2);
}
else if(c2 == 0) { /* ¸å»ÏËö */
if(modeFlag == KANJI_MODE)
JisAsciiSeq();
if(kanaFlag) {
printf(CT_LATIN1); /* Latin-1 ¤ò G1 ¤Ë¤â¤É¤¹ */
kanaFlag = 0;
}
modeFlag = ASCII_MODE;
}
else if(kanaInFlag) {
if(!kanaFlag) {
JisKanaSeq();
kanaFlag = 1;
}
putchar(c2|0x80);
}
else {
if(modeFlag != ASCII_MODE) {
JisAsciiSeq();
modeFlag = ASCII_MODE;
}
if(c2 == '\n' && kanaFlag) {
printf(CT_LATIN1); /* Latin-1 ¤ò G1 ¤Ë¤â¤É¤¹ */
kanaFlag = 0;
}
putchar(c2);
}
}
/* jis convert to shift jis and output.
*/
void JtoS(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
c1 &= 0xff;
c2 &= 0xff;
if(c1 & 1)
c2 += 0x1f;
else
c2 += 0x7d;
if(c2 >= 0x7f)
++c2;
c1 = ((c1 - 0x21) >> 1) + 0x81;
if(c1 > 0x9f)
c1 += 0x40;
putchar(c1);
putchar(c2);
}
else { /* £±¥Ð¥¤¥È·Ïʸ»ú */
if(kanaInFlag)
c2 |= 0x80;
putchar(c2);
}
}
/* jis convert to euc and output.
*/
void JtoE(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
putchar(c1 | 0x80);
putchar(c2 | 0x80);
}
else { /* £±¥Ð¥¤¥È·Ïʸ»ú */
if(kanaInFlag) {
putchar(0x8e);
c2 |= 0x80;
}
else if(iskana(c2))
putchar(0x8e);
putchar(c2);
}
}
/* shift jis convert to jis and output.
*/
void StoJ(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
if(modeFlag != KANJI_MODE) {
JisKanjiSeq();
modeFlag = KANJI_MODE;
}
c1 &= 0xff; /* Shift-JIS first byte */
c2 &= 0xff; /* Shift-JIS second byte */
c1 -= (c1 < 0xa0) ? 0x71 : 0xb1;
c1 = (c1 << 1) + 1;
if(c2 > 0x7f)
--c2;
if(c2 > 0x9d) {
c2 -= 0x7d;
++c1;
}
else
c2 -= 0x1f;
putchar(c1);
putchar(c2);
}
else { /* £±¥Ð¥¤¥È·Ïʸ»ú */
if(iskana(c2)) {
if(modeFlag != KANA_MODE) {
JisKanaSeq();
modeFlag = KANA_MODE;
}
if(bit7)
c2 &= 0x7f;
}
else {
if(modeFlag != ASCII_MODE) {
JisAsciiSeq();
modeFlag = ASCII_MODE;
}
}
if(c2) putchar(c2);
}
}
/* shift jis convert to ctext and output.
*/
void StoC(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
if(modeFlag != KANJI_MODE) {
JisKanjiSeq();
modeFlag = KANJI_MODE;
}
c1 &= 0xff; /* Shift-JIS first byte */
c2 &= 0xff; /* Shift-JIS second byte */
c1 -= (c1 < 0xa0) ? 0x71 : 0xb1;
c1 = (c1 << 1) + 1;
if(c2 > 0x7f)
--c2;
if(c2 > 0x9d) {
c2 -= 0x7d;
++c1;
}
else
c2 -= 0x1f;
putchar(c1);
putchar(c2);
}
else if(c2 == 0) { /* ¸å»ÏËö */
if(modeFlag == KANJI_MODE)
JisAsciiSeq();
if(kanaFlag) {
printf(CT_LATIN1); /* Latin-1 ¤ò G1 ¤Ë¤â¤É¤¹ */
kanaFlag = 0;
}
modeFlag = ASCII_MODE;
}
else if(iskana(c2)) {
if(!kanaFlag) {
JisKanaSeq();
kanaFlag = 1;
}
putchar(c2);
}
else { /* ASCII character */
if(modeFlag == KANJI_MODE) {
JisAsciiSeq();
modeFlag = ASCII_MODE;
}
if(c2 == '\n' && kanaFlag) {
printf(CT_LATIN1); /* Latin-1 ¤ò G1 ¤Ë¤â¤É¤¹ */
kanaFlag = 0;
}
putchar(c2);
}
}
/* shift jis convert to shift jis and output.
*/
void StoS(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
putchar(c1);
putchar(c2);
}
else /* £±¥Ð¥¤¥È·Ïʸ»ú */
putchar(c2);
}
/* shift jis convert to euc and output.
*/
void StoE(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
c1 &= 0xff; /* Shift-JIS first byte */
c2 &= 0xff; /* Shift-JIS second byte */
c1 -= (c1 < 0xa0) ? 0x71 : 0xb1;
c1 = (c1 << 1) + 1;
if(c2 > 0x7f)
--c2;
if(c2 > 0x9d) {
c2 -= 0x7d;
++c1;
}
else
c2 -= 0x1f;
putchar(c1 | 0x80);
putchar(c2 | 0x80);
}
else { /* £±¥Ð¥¤¥È·Ïʸ»ú */
if(iskana(c2))
putchar(0x8e);
putchar(c2);
}
}
/* euc convert to jis and output.
*/
void EtoJ(c1, c2)
register int c1, c2;
{
if(c1 == 0) { /* ASCII character. */
if(c2 == 0) { /* ¸å»ÏËö */
if(modeFlag == KANJI_MODE ||
(modeFlag == KANA_MODE && bit7)) {
JisAsciiSeq();
}
modeFlag = ASCII_MODE;
}
else {
if(modeFlag != ASCII_MODE) {
JisAsciiSeq();
modeFlag = ASCII_MODE;
}
putchar(c2);
}
}
else if(isEkana1(c1)) {
if(bit7) {
if(modeFlag != KANA_MODE) {
JisKanaSeq();
modeFlag = KANA_MODE;
}
c2 &= 0x7f;
}
else {
if(modeFlag != ASCII_MODE) {
JisAsciiSeq();
modeFlag = ASCII_MODE;
}
}
putchar(c2);
}
else {
/* ´Á»ú¤Î¾ì¹ç */
if(modeFlag != KANJI_MODE) {
JisKanjiSeq();
modeFlag = KANJI_MODE;
}
putchar(c1 & 0x7f);
putchar(c2 & 0x7f);
}
}
/* euc convert to ctext and output.
*/
void EtoC(c1, c2)
register int c1, c2;
{
if(c1 == 0) { /* ASCII character. */
if(c2 == 0) { /* ¸å»ÏËö */
if(modeFlag == KANJI_MODE)
JisAsciiSeq();
if(kanaFlag) {
printf(CT_LATIN1); /* Latin-1 ¤ò G1 ¤Ë¤â¤É¤¹ */
kanaFlag = 0;
}
modeFlag = ASCII_MODE;
}
else {
if(modeFlag != ASCII_MODE) {
JisAsciiSeq();
modeFlag = ASCII_MODE;
}
if(c2 == '\n' && kanaFlag) {
printf(CT_LATIN1); /* Latin-1 ¤ò G1 ¤Ë¤â¤É¤¹ */
kanaFlag = 0;
}
putchar(c2);
} }
else if(isEkana1(c1)) {
if(!kanaFlag) {
JisKanaSeq();
kanaFlag = 1;
}
putchar(c2);
}
else { /* ´Á»ú¤Î¾ì¹ç */
if(modeFlag != KANJI_MODE) {
JisKanjiSeq();
modeFlag = KANJI_MODE;
}
putchar(c1 & 0x7f);
putchar(c2 & 0x7f);
}
}
/* euc convert to shift jis and output.
*/
void EtoS(c1, c2)
register int c1, c2;
{
if(c1 == 0) /* £±¥Ð¥¤¥È·Ïʸ»ú */
putchar(c2);
else if(isEkana1(c1))
putchar(c2);
else { /* ´Á»ú¤Î¾ì¹ç */
c1 &= 0x7f;
c2 &= 0x7f;
if(c1 & 1)
c2 += 0x1f;
else
c2 += 0x7d;
if(c2 >= 0x7f)
++c2;
c1 = ((c1 - 0x21) >> 1) + 0x81;
if(c1 > 0x9f)
c1 += 0x40;
putchar(c1);
putchar(c2);
}
}
/* euc convert to euc and output.
*/
void EtoE(c1, c2)
register int c1, c2;
{
if(c1) {
/* ´Á»ú¤Î¾ì¹ç */
putchar(c1);
putchar(c2);
}
else /* £±¥Ð¥¤¥È·Ïʸ»ú */
putchar(c2);
}
void NoConv(c1, c2)
register int c1, c2;
{
if(c1) putchar(c1);
if(c2) putchar(c2);
}
/* get character from the stream with my unget buffer.
*/
fGetc(fp)
FILE *fp;
{
int fgetc();
if(ungetCount2) {
if(ungetCount < ungetCount2)
return(ungetBuf[ungetCount++]);
else {
ungetCount = ungetCount2 = 0;
return(fgetc(fp));
}
}
else if(ungetCount > 0)
return(ungetBuf[--ungetCount]);
else
return(fgetc(fp));
}
/* unget character to my buffer.
*/
void unGetc(c)
int c;
{
if(ungetCount2)
--ungetCount;
else if(ungetCount < bufSize)
ungetBuf[ungetCount++] = c;
}
/* set kanji convert function.
*/
void SetKanjiOutFunc()
{
switch(kanjiOutCode) {
case JIS2:
case JIS3:
case JIS4:
switch(kanjiInCode) {
case JIS:
kanjiOutFunc = JtoJ;
break;
case CT:
kanjiOutFunc = CtoJ;
break;
case SJIS:
kanjiOutFunc = StoJ;
break;
case EUC:
kanjiOutFunc = EtoJ;
break;
}
break;
case CT:
switch(kanjiInCode) {
case JIS:
kanjiOutFunc = JtoC;
break;
case CT:
kanjiOutFunc = CtoC;
break;
case SJIS:
kanjiOutFunc = StoC;
break;
case EUC:
kanjiOutFunc = EtoC;
break;
}
break;
case SJIS:
switch(kanjiInCode) {
case JIS:
case CT:
kanjiOutFunc = JtoS;
break;
case SJIS:
kanjiOutFunc = StoS;
break;
case EUC:
kanjiOutFunc = EtoS;
break;
}
break;
case EUC:
switch(kanjiInCode) {
case JIS:
case CT:
kanjiOutFunc = JtoE;
break;
case SJIS:
kanjiOutFunc = StoE;
break;
case EUC:
kanjiOutFunc = EtoE;
break;
}
break;
}
}
/* set ambiguous line number.
*/
void setFirstErrorLine(errorLine, line)
int *errorLine;
int line;
{
/* If ambiguous line is found at first time,
* set that line number.
*/
if(*errorLine == 0)
*errorLine = line;
}
fGetc2(fp)
FILE *fp;
{
int c;
if(ungetCount) {
c = ungetBuf[ungetCount2-ungetCount];
--ungetCount;
return(c);
}
c = fgetc(fp);
if(c == -1)
return(-1);
ungetBuf[ungetCount2++] = c;
#ifdef DEBUG
if(c != -1)
fprintf(stderr, "[%02x]", c);
#endif
return(c);
}
/* unget character to my buffer.
*/
void unGetc2(c)
int c;
{
if(ungetCount2)
++ungetCount;
}
CodeCheck(fp)
FILE *fp;
{
register int c, cc, chksize, ccc, cccc;
int kCode = UNKNOWN, kCodeMaybe = UNKNOWN;
ungetCount = ungetCount2 = 0;
chksize = bufSize;
while(chksize > 0 && (c = fGetc2(fp)) != EOF) {
if(c > 0x7f) {
if(kCodeMaybe != SJIS && isEkanji(c)) {
if((cc = fGetc2(fp)) == EOF) {
if(iskana(c)) {
/* SJIS ¤« JIS-8bit ¤«¤Î
* ¶èÊ̤¬¤Ä¤«¤Ê¤¤¤¦¤Á¤Ë
* ¥Õ¥¡¥¤¥ë¤Î½ª¤ê¡£¤·¤ç¤¦¤¬
* ¤Ê¤¤¤Î¤Ç SJIS ¤ÈȽÃÇ... */
kCode = SJIS;
break;
}
/* ¤Ê¥Ð¥«¤Ê...¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë
* ¤È¤·¤«»×¤¨¤Ê¤¤... */
break;
}
else if(isEkanji2(cc)) {
if(isSkanji(c) && isSkanji2(cc)) {
/* EUC¤«SJIS´Á»ú¤ÎȽÃǤĤ«¤º
* ¤È¤ê¤¢¤¨¤º EUC¤ÈȽÃǤ·¤Æ¡¢
* ¤µ¤é¤Ëõ¤ë... */
#ifdef DEBUG
fprintf(stderr, "EUC?\n");
#endif
kCodeMaybe = EUC;
chksize -= 2;
continue;
}
else if(iskana(c) && iskana(cc)) {
/* EUC¤ß¤¿¤¤¤À¤±¤É¡¢
* 8bit²¾Ì¾¤¬2¸Ä³¤¤¤¿¤À¤±
* ¤«¤âÃΤì¤Ê¤¤¤Î¤Ç¡¢
* ¤È¤ê¤¢¤¨¤º EUC¤ÈȽÃǤ·¤Æ¡¢
* ¤µ¤é¤Ëõ¤ë... */
#ifdef DEBUG
fprintf(stderr, "EUC?\n");
#endif
kCodeMaybe = EUC;
chksize -= 2;
continue;
}
else {
/* EUC ·èÄê!! */
kCode = EUC;
break;
}
}
else if(iskana(c)) {
/* 2 ¥Ð¥¤¥ÈÌܤ¬EUC´Á»ú¤¸¤ã¤Ê¤¤
* ¤Î¤ÇSJIS²¾Ì¾¤ß¤¿¤¤¤À¤±¤É¡¢
* JIS-8bit²¾Ì¾¤«¤âÃΤì¤Ê¤¤¤Î¤Ç¡¢
* ¤È¤ê¤¢¤¨¤ºSJIS¤ÈȽÃǤ·¤Æ¡¢
* ¤µ¤é¤Ëõ¤ë... */
kCodeMaybe = SJIS;
#ifdef DEBUG
fprintf(stderr, "SJIS?\n");
#endif
unGetc2(cc);
--chksize;
continue;
}
}
else if(iskana(c)) {
/* SJIS¤ß¤¿¤¤¤À¤±¤É¡¢
* JIS-8bit²¾Ì¾¤«¤â
* ÃΤì¤Ê¤¤¤Î¤Ç¡¢¤µ¤é¤Ëõ¤ë... */
kCodeMaybe = SJIS;
#ifdef DEBUG
fprintf(stderr, "SJIS?\n");
#endif
--chksize;
continue;
}
else if(isEkana1(c)) {
if((cc = fGetc2(fp)) == EOF) {
/* ¤Ê¥Ð¥«¤Ê...¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë
* ¤È¤·¤«»×¤¨¤Ê¤¤... */
break;
}
else if(isEkana2(cc)) {
/* EUC ·èÄê!! */
kCode = EUC;
break;
}
else {
/* ¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë? */
unGetc2(cc);
--chksize;
continue;
}
}
else if(isSkanji(c)) {
if((cc = fGetc2(fp)) == EOF) {
/* ¤Ê¥Ð¥«¤Ê...¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë
* ¤È¤·¤«»×¤¨¤Ê¤¤... */
break;
}
else if(isSkanji2(cc)) {
/* SJIS ·èÄê!! */
kCode = SJIS;
break;
}
else {
/* ¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë? */
unGetc2(cc);
--chksize;
continue;
}
}
}
else if(c == ESC) {
if((cc = fGetc2(fp)) == EOF) {
/* ¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë? */
break;
}
else if(cc == '$') {
if((ccc = fGetc2(fp)) == EOF) {
/* ¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë? */
unGetc2(cc);
--chksize;
continue;
}
else if(ccc == '@' || ccc == 'B') {
/* JIS ·èÄê!! */
kCode = JIS;
break;
}
else if(ccc == '(') {
if((cccc = fGetc2(fp)) == EOF) {
/* ¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë? */
unGetc2(ccc);
unGetc2(cc);
--chksize;
continue;
}
else if(cccc == 'B') {
/* CTEXT ·èÄê!! */
kCode = CT;
break;
}
}
else {
unGetc2(cc);
unGetc2(ccc);
--chksize;
continue;
}
}
else if(cc == '(') {
if((ccc = fGetc2(fp)) == EOF) {
/* ¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë? */
unGetc2(cc);
--chksize;
continue;
}
else if(ccc == 'H') {
/* ESC(H ¤Ï´Ö°ã¤Ã¤¿¥·¡¼¥±¥ó¥¹¤Ç¡¢
* ËÜÅö¤Ï¥¹¥§¡¼¥Ç¥óʸ»ú¥»¥Ã¥ÈÍѤǤ¹¡£
*/
kCode = ILLEGAL;
break;
}
else if(ccc == 'J' || ccc == 'B'
|| ccc == 'I') {
/* JIS ·èÄê!! */
kCode = JIS;
break;
}
else {
unGetc2(cc);
unGetc2(ccc);
--chksize;
continue;
}
}
else if(cc == ')') {
if((ccc = fGetc2(fp)) == EOF) {
/* ¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë? */
unGetc2(cc);
--chksize;
continue;
}
else if(ccc == 'I') {
/* CTEXT ·èÄê!! */
kCode = CT;
break;
}
else {
unGetc2(cc);
unGetc2(ccc);
--chksize;
continue;
}
}
else if(cc == '-') {
if((ccc = fGetc2(fp)) == EOF) {
/* ¥Õ¥¡¥¤¥ë¤¬²õ¤ì¤Æ¤¤¤ë? */
unGetc2(cc);
--chksize;
continue;
}
else if(ccc == 'A') {
/* CTEXT ·èÄê!! */
kCode = CT;
break;
}
else {
unGetc2(cc);
unGetc2(ccc);
--chksize;
continue;
}
}
else {
unGetc2(cc);
--chksize;
continue;
}
}
else if(c == SO || c == SI) {
/* JIS ·èÄê!! */
kCode = JIS;
break;
}
else if(codeCheckOnly)
--ungetCount2;
else {
if(kCodeMaybe == UNKNOWN) {
putchar(c);
--ungetCount2;
}
else
--chksize;
}
}
ungetCount = 0;
if(kCode == UNKNOWN)
return(kCodeMaybe);
else
return(kCode);
}
/* stream convert main routine.
*/
ConvertMain(fName, fp, errorLine)
char *fName;
FILE *fp;
int *errorLine;
{
register int c, cc, ccc, cccc;
int failFlag = 1; /* if there is the character that
* could not convert, this is cleared by zero.*/
int kanjiInFlag = 0; /* use for JIS. */
int line = 1; /* current line. */
getIllegalSeq = 0;
*errorLine = 0; /* first found ambiguous line. */
if(kanjiInCode == UNKNOWN) {
kanjiInCode = CodeCheck(fp);
if(codeCheckOnly) {
if(fName)
printf("%s : ", fName);
switch(kanjiInCode) {
case -2: printf("ILLEGAL\n");
getIllegalSeq = 1; return(0);
case -1: printf("UNKNOWN\n"); break;
case 0: printf("JIS\n"); break;
case 5: printf("SJIS\n"); break;
case 6: printf("EUC\n"); break;
case 7: printf("CT\n"); break;
}
return(1);
}
else if(kanjiInCode == ILLEGAL) {
getIllegalSeq = 1;
return(0);
}
#ifdef DEBUG
else
fprintf(stderr, "\n");
#endif
SetKanjiOutFunc();
}
while((c = fGetc(fp)) != EOF) {
if(c > 0x7f) {
switch(kanjiInCode) {
case EUC:
if(isEkanji(c)) {
/* if euc 1'st byte. */
if((cc = fGetc(fp)) == EOF) {
(*kanjiOutFunc)(0, c);
setFirstErrorLine(errorLine, line);
return(0);
}
else if(isEkanji2(cc)) {
/* if 2'nd byte is euc too !
*/
(*kanjiOutFunc)(c, cc);
}
else {
/* if i can't understand 2'nd byte.
*/
(*kanjiOutFunc)(0, c);
failFlag = 0;
unGetc(cc);
setFirstErrorLine(errorLine, line);
}
}
else if(isEkana1(c)) {
/* if euc kana 1'st byte. */
if((cc = fGetc(fp)) == EOF) {
(*kanjiOutFunc)(0, c);
setFirstErrorLine(errorLine, line);
return(0);
}
else if(isEkana2(cc))
(*kanjiOutFunc)(c, cc);
else {
/* if i can't understand 2'nd byte.
*/
(*kanjiOutFunc)(0, c);
failFlag = 0;
unGetc(cc);
setFirstErrorLine(errorLine, line);
}
}
break;
case SJIS:
if(isSkanji(c)) {
/* if shift jis 1'st byte. */
if((cc = fGetc(fp)) == EOF) {
(*kanjiOutFunc)(0, c);
setFirstErrorLine(errorLine, line);
return(0);
}
else if(isSkanji2(cc)) {
/* if 2'nd byte is shift jis too !
*/
(*kanjiOutFunc)(c, cc);
}
else {
/* if i can't understand 2'nd byte.
*/
(*kanjiOutFunc)(0, c);
failFlag = 0;
unGetc(cc);
setFirstErrorLine(errorLine, line);
}
}
else if(iskana(c)) {
/* if 8 bit kana 1'st byte.*/
(*kanjiOutFunc)(0, c);
}
else {
(*kanjiOutFunc)(0, c);
failFlag = 0;
setFirstErrorLine(errorLine, line);
}
break;
case JIS:
case CT:
if(iskana(c)) {
/* if 8 bit kana 1'st byte.*/
(*kanjiOutFunc)(0, c);
}
else {
(*kanjiOutFunc)(0, c);
failFlag = 0;
setFirstErrorLine(errorLine, line);
}
break;
}
}
else if(c == ESC) {
if((cc = fGetc(fp)) == EOF) {
(*kanjiOutFunc)(0, c);
setFirstErrorLine(errorLine, line);
return(0);
}
else if(cc == '$') {
if((ccc = fGetc(fp)) == EOF) {
/* if i can't get complete
* jis sequence.
*/
(*kanjiOutFunc)(0, c);
failFlag = 0;
unGetc(cc);
setFirstErrorLine(errorLine, line);
}
else if(ccc == '@' || ccc == 'B') {
/* jis kanji code ! */
kanjiInFlag = 1;
kanaInFlag = 0;
}
else if(ccc == '(') {
if((cccc = fGetc(fp)) == EOF) {
/* if i can't get complete
* ctext sequence.
*/
(*kanjiOutFunc)(0, c);
failFlag = 0;
unGetc(ccc);
unGetc(cc);
setFirstErrorLine(errorLine,
line);
}
else if(cccc == 'B') {
/* ctext code ! */
kanjiInFlag = 1;
kanaInFlag = 0;
}
else {
(*kanjiOutFunc)(0, ESC);
failFlag = 0;
unGetc(cc);
unGetc(ccc);
unGetc(cccc);
setFirstErrorLine(errorLine,
line);
}
}
else {
(*kanjiOutFunc)(0, ESC);
failFlag = 0;
unGetc(cc);
unGetc(ccc);
setFirstErrorLine(errorLine, line);
}
}
else if(cc == '(') {
if((ccc = fGetc(fp)) == EOF) {
/* if i can't get complete
* jis sequence.
*/
(*kanjiOutFunc)(0, c);
failFlag = 0;
unGetc(cc);
setFirstErrorLine(errorLine, line);
}
else if(ccc == 'H') {
/* ESC(H ¤Ï´Ö°ã¤Ã¤¿¥·¡¼¥±¥ó¥¹¤Ç¡¢
* ËÜÅö¤Ï¥¹¥§¡¼¥Ç¥óʸ»ú¥»¥Ã¥ÈÍѤǤ¹¡£
*/
failFlag = 0;
getIllegalSeq = 1;
setFirstErrorLine(errorLine, line);
kanaInFlag = kanjiInFlag = 0;
}
else if(ccc == 'J' || ccc == 'B') {
/* jis code ! */
kanaInFlag = kanjiInFlag = 0;
}
else if(ccc == 'I') {
/* jis code ! */
kanjiInFlag = 0;
kanaInFlag = 1;
}
else {
(*kanjiOutFunc)(0, ESC);
failFlag = 0;
unGetc(cc);
unGetc(ccc);
setFirstErrorLine(errorLine, line);
}
}
else if(cc == ')') {
if((ccc = fGetc(fp)) == EOF) {
/* if i can't get complete
* ctext sequence.
*/
(*kanjiOutFunc)(0, c);
failFlag = 0;
unGetc(cc);
setFirstErrorLine(errorLine, line);
}
else if(ccc == 'I') {
/* ctext code ! */
/* right half of JISX0201 to G1 */
}
else {
(*kanjiOutFunc)(0, ESC);
failFlag = 0;
unGetc(cc);
unGetc(ccc);
setFirstErrorLine(errorLine, line);
}
}
else if(cc == '-') {
if((ccc = fGetc(fp)) == EOF) {
/* if i can't get complete
* ctext sequence.
*/
(*kanjiOutFunc)(0, c);
failFlag = 0;
unGetc(cc);
setFirstErrorLine(errorLine, line);
}
else if(ccc == 'A') {
/* ctext code ! */
kanjiInFlag = kanaInFlag = 0;
}
else {
(*kanjiOutFunc)(0, ESC);
failFlag = 0;
unGetc(cc);
unGetc(ccc);
setFirstErrorLine(errorLine, line);
}
}
else {
(*kanjiOutFunc)(0, ESC);
failFlag = 0;
unGetc(cc);
setFirstErrorLine(errorLine, line);
}
}
else if(kanjiInFlag && c > 0x20) {
/* if JIS kanji code and kanji mode in.
*/
if((cc = fGetc(fp)) == EOF) {
(*kanjiOutFunc)(0, c);
setFirstErrorLine(errorLine, line);
return(0);
}
else
(*kanjiOutFunc)(c, cc);
}
else if(c == SO) {
/* jis code ! */
kanaInFlag = 1;
}
else if(c == SI) {
/* jis code ! */
kanaInFlag = 0;
}
else {
/* ank or controll character.
*/
(*kanjiOutFunc)(0, c);
if(c == '\n')
++line;
}
}
if(kanjiOutCode == JIS2 || kanjiOutCode == JIS3 ||
kanjiOutCode == JIS4 || kanjiOutCode == CT)
/* atoshimatsu */
(*kanjiOutFunc)(0, 0);
return(failFlag);
}
ParseInOptionSub(c)
char c;
{
switch(c) {
case 'J' :
case 'j' :
case 'o' : kanjiInCode = JIS; break;
case 'c' : kanjiInCode = CT; break;
case 's' : kanjiInCode = SJIS; break;
case 'e' : kanjiInCode = EUC; break;
default : return(0);
}
return(1);
}
ParseOutOptionSub(c)
char c;
{
switch(c) {
case 'j' :
kanjiOutCode = JIS4;
bit7 = 1;
switch(kanjiInCode) {
case UNKNOWN:
kanjiOutFunc = NoConv;
break;
case JIS:
kanjiOutFunc = JtoJ;
break;
case CT:
kanjiOutFunc = CtoJ;
break;
case SJIS:
kanjiOutFunc = StoJ;
break;
case EUC:
kanjiOutFunc = EtoJ;
break;
}
break;
case 'J' :
kanjiOutCode = JIS3;
switch(kanjiInCode) {
case UNKNOWN:
kanjiOutFunc = NoConv;
break;
case JIS:
kanjiOutFunc = JtoJ;
break;
case CT:
kanjiOutFunc = CtoJ;
break;
case SJIS:
kanjiOutFunc = StoJ;
break;
case EUC:
kanjiOutFunc = EtoJ;
break;
}
break;
case 'o' :
kanjiOutCode = JIS2;
bit7 = 1;
switch(kanjiInCode) {
case UNKNOWN:
kanjiOutFunc = NoConv;
break;
case JIS:
kanjiOutFunc = JtoJ;
break;
case CT:
kanjiOutFunc = CtoJ;
break;
case SJIS:
kanjiOutFunc = StoJ;
break;
case EUC:
kanjiOutFunc = EtoJ;
break;
}
break;
case 'c' :
kanjiOutCode = CT;
switch(kanjiInCode) {
case UNKNOWN:
kanjiOutFunc = NoConv;
break;
case JIS:
kanjiOutFunc = JtoC;
break;
case CT:
kanjiOutFunc = CtoC;
break;
case SJIS:
kanjiOutFunc = StoC;
break;
case EUC:
kanjiOutFunc = EtoC;
break;
}
break;
case 's' :
kanjiOutCode = SJIS;
switch(kanjiInCode) {
case UNKNOWN:
kanjiOutFunc = NoConv;
break;
case JIS:
kanjiOutFunc = JtoS;
break;
case CT:
kanjiOutFunc = CtoS;
break;
case SJIS:
kanjiOutFunc = StoS;
break;
case EUC:
kanjiOutFunc = EtoS;
break;
}
break;
case 'e' :
kanjiOutCode = EUC;
switch(kanjiInCode) {
case UNKNOWN:
kanjiOutFunc = NoConv;
break;
case JIS:
kanjiOutFunc = JtoE;
break;
case CT:
kanjiOutFunc = CtoE;
break;
case SJIS:
kanjiOutFunc = StoE;
break;
case EUC:
kanjiOutFunc = EtoE;
break;
}
break;
default :
return(0);
}
return(1);
}
ParseOption(opt)
char *opt;
{
if(opt[0] == 'b') {
bufSize = atoi(opt+1);
if(opt[strlen(opt)-1] == 'k')
bufSize *= 1024;
return(1);
}
if(opt[1] == '\0') {
if(opt[0] == 'k') {
codeCheckOnly = 1;
return(1);
}
else if(opt[0] == 'f') {
setbuf(stdout, NULL);
return(1);
}
else if(opt[0] == 'v') {
verboseMode = 1;
return(1);
}
else
/* if appointed output kanji code only.
*/
return(ParseOutOptionSub(opt[0]));
}
else if(opt[2] == '\0') {
/* if appointed input and output kanji code.
*/
if(!ParseInOptionSub(opt[0]))
return(0);
return(ParseOutOptionSub(opt[1]));
}
else
return(0);
}
#ifdef MSDOS
/* get base file name from path string.
*/
char *GetBaseName(name)
char *name;
{
#if defined(_MSC_VER) && _MSC_VER >= 800
char *_mbsrchr();
#else
char *jstrrchr();
#endif
char *strrchr();
char *p1, *p2;
#if defined(_MSC_VER) && _MSC_VER >= 800
p1 = _mbsrchr(name, '\\');
#else
p1 = jstrrchr(name, '\\');
#endif
p2 = strrchr(name, ':');
if(p1 == NULL && p2 == NULL)
return(name);
else if(p1 != NULL)
return(p1+1);
else /*if(p2 != NULL)*/
return(p2+1);
}
#endif
void Usage(prog)
char *prog;
{
#ifdef MSDOS
/* remove path name string.
*/
prog = GetBaseName(prog);
#endif
fprintf(stderr, "<< Kanji convert program. (Version 1.8 by s.mukawa) >>\n");
fprintf(stderr, "Usage %s [-k][-bXX][-f][-v][-[input kanjicode][output kanjicode]] [file(s)]\n", prog);
fprintf(stderr, " -k : code check only(not convert)\n");
fprintf(stderr, " -bXX[k] : code check size(k:kilo-byte,default=1024)\n");
fprintf(stderr, " -f : non buffering standard output\n");
fprintf(stderr, " -v : verbose mode\n");
fprintf(stderr, " kanjicode\n");
fprintf(stderr, "\t s : shift jis\n");
fprintf(stderr, "\t e : euc\n");
fprintf(stderr, "\t j : jis { ESC$B [kanji'83], ESC(B [ascii], ESC(I [kana(7bit)] }\n");
fprintf(stderr, "\t J : jis { ESC$B [kanji'83], ESC(J [roman] }\n");
fprintf(stderr, "\t o : oldjis { ESC$@ [kanji'78], ESC(J [ascii], ESC(I [kana(7bit)] }\n");
fprintf(stderr, "\t c : ctext { ESC$(B [kanji'83], ESC(B [ascii], ESC)I [kana(8bit)] }\n");
exit(1);
}
void SorryMessage(name, ln)
char *name;
int ln;
{
if(getIllegalSeq) {
fprintf(stderr, "This input stream has ILLEGAL escape sequence [ ESC(H ]!!!!\n");
if(kanjiInCode == ILLEGAL)
fprintf(stderr, "But, You can convert, if you dare to sepcify input kanji code !!\n");
}
else if(verboseMode) {
fprintf(stderr, "Sorry, i found mysterious character(s).");
if(name)
fprintf(stderr, "<%s : %d line>\n", name, ln);
else
fprintf(stderr, "<%d line>\n", ln);
fprintf(stderr, "BUT almost completely.\n");
}
}
main(ac, av)
int ac;
char *av[];
{
int i, line, exitStatus = 0;
FILE *fp;
char *env, *getenv();
/* get environment value.
*/
env = getenv("KC");
if(env != NULL) {
char *tkn, *strtok();
tkn = strtok(env, " ");
while(tkn != NULL) {
if(*tkn == '-')
if(!ParseOption(tkn+1))
Usage(*av);
tkn = strtok(NULL, " ");
}
}
/* check option switch.
*/
for(i = 1; i < ac; ++i)
if(av[i][0] == '-') {
if(!ParseOption(av[i]+1))
Usage(*av);
}
else
break;
ungetBuf = (int *)calloc(bufSize, sizeof(int));
if(ungetBuf == NULL) {
fprintf(stderr, "I can't allocate memory !!\n");
exit(1);
}
/* if put NO option, set default function to JtoS().
* (convertion jis to shift jis convert)
*/
if(kanjiOutFunc == NULL)
kanjiOutFunc = JtoS;
if(i == ac) {
/* read from standard input.
*/
if(!ConvertMain(NULL, stdin, &line)) {
SorryMessage(NULL, line);
exitStatus |= 1;
}
}
else {
int onlyOneFile = 0;
if(i + 1 == ac)
onlyOneFile = 1;
for(; i < ac; ++i)
if((fp = fopen(av[i], "r")) != NULL) {
if(!ConvertMain(onlyOneFile ? NULL : av[i],
fp, &line)) {
SorryMessage(av[i], line);
exitStatus |= 1;
}
(void)fclose(fp);
kanjiInCode = UNKNOWN;
}
else {
if(verboseMode)
fprintf(stderr,
"I can't read.(%s)\n", av[i]);
exitStatus |= 1;
}
}
free(ungetBuf);
/* process exit status.
* 0 := sucess
* 1 := fail
*/
exit(exitStatus);
}